This is Python Tutorial¶

This is our first program in python: It is just started here

In [1]:
print("Hello World")
Hello World

$a=b+c$

Variables¶

In [1]:
x = 3
In [2]:
%whos # what variable is saved so far
Variable   Type    Data/Info
----------------------------
x          int     3
In [3]:
print(type(x))
<class 'int'>
In [4]:
x = 5.7
In [5]:
%whos
Variable   Type     Data/Info
-----------------------------
x          float    5.7
In [6]:
print(type(x))
<class 'float'>
In [7]:
abcd = 556.32
In [8]:
%whos
Variable   Type     Data/Info
-----------------------------
abcd       float    556.32
x          float    5.7
In [9]:
a,b,c,d,f = 3,5,6.0,7.2,-3
In [10]:
%whos
Variable   Type     Data/Info
-----------------------------
a          int      3
abcd       float    556.32
b          int      5
c          float    6.0
d          float    7.2
f          int      -3
x          float    5.7
In [11]:
del abcd
In [12]:
%whos
Variable   Type     Data/Info
-----------------------------
a          int      3
b          int      5
c          float    6.0
d          float    7.2
f          int      -3
x          float    5.7
In [13]:
print(abcd)
---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
Cell In[13], line 1
----> 1 print(abcd)

NameError: name 'abcd' is not defined
In [14]:
c = 2+4j
In [15]:
print(type(c))
<class 'complex'>
In [16]:
s = "hellow how are you"
In [17]:
print(type(s))
<class 'str'>

Operators¶

In [18]:
%whos
Variable   Type       Data/Info
-------------------------------
a          int        3
b          int        5
c          complex    (2+4j)
d          float      7.2
f          int        -3
s          str        hellow how are you
x          float      5.7
In [19]:
sumOfaAndb = a+b #variables name should give you the look and feel what the data has
In [20]:
print(sumOfaAndb)
8
In [21]:
type(sumOfaAndb)
Out[21]:
int
In [22]:
type(a+d) #float is super set than int and Python follows the super set
Out[22]:
float
In [23]:
v = ((a+d)**3)/4
In [24]:
print(v)
265.30199999999996
In [26]:
s1 = "hellow"
s2 = "world"
s = s1+s2
print(s)
hellowworld
In [27]:
10//3 #quotient
Out[27]:
3
In [28]:
10/3
Out[28]:
3.3333333333333335
In [29]:
_ # stores the result of the above expression
Out[29]:
3.3333333333333335
In [30]:
3x = 5 #can a variable name start with a digit i.e. 3x? NO
  Cell In[30], line 1
    3x = 5
    ^
SyntaxError: invalid decimal literal
In [31]:
@y = 4 #can't start a variable name with @
  Cell In[31], line 1
    @y = 4
     ^
SyntaxError: invalid syntax. Maybe you meant '==' or ':=' instead of '='?
In [32]:
*t=4 #can't start a variable name with * including special sign other then _
  Cell In[32], line 1
    *t=4
    ^
SyntaxError: starred assignment target must be in a list or tuple
In [33]:
_e = 6 # not recommended
In [34]:
startingTimeOfTheCourse = 2.0
In [35]:
%whos
Variable                  Type       Data/Info
----------------------------------------------
a                         int        3
b                         int        5
c                         complex    (2+4j)
d                         float      7.2
f                         int        -3
s                         str        hellowworld
s1                        str        hellow
s2                        str        world
startingTimeOfTheCourse   float      2.0
sumOfaAndb                int        8
v                         float      265.30199999999996
x                         float      5.7

Bool¶

In [36]:
a = True
b = True
c = False
In [37]:
%whos
Variable                  Type     Data/Info
--------------------------------------------
a                         bool     True
b                         bool     True
c                         bool     False
d                         float    7.2
f                         int      -3
s                         str      hellowworld
s1                        str      hellow
s2                        str      world
startingTimeOfTheCourse   float    2.0
sumOfaAndb                int      8
v                         float    265.30199999999996
x                         float    5.7
In [39]:
print(a and b)
print(a and c)
print(c and a)
True
False
False
In [41]:
d = a or c
print(d)
True
In [42]:
not(a)
Out[42]:
False
In [43]:
not(b)
Out[43]:
False
In [44]:
not(c)
Out[44]:
True
In [45]:
t = not(d)
In [46]:
type(t)
Out[46]:
bool
In [47]:
print(t)
False
In [48]:
not((a and b) or (c or d))
Out[48]:
False

Comparisons¶

In [49]:
print(2<3)
True
In [50]:
c = 2<3
print(type(c))
print(c)
<class 'bool'>
True
In [51]:
d = 3==4
In [52]:
print(d)
False
In [53]:
3==3.0
Out[53]:
True
In [54]:
x = 4
y = 9
z = 8.3
r = -3
In [55]:
(x<y) and (z<y) or (r==x)
Out[55]:
True
In [56]:
(r==x) and (x<y) or (z>y)
Out[56]:
False
In [61]:
(True or False) and False # and first and then or
Out[61]:
False
In [62]:
print((not(2!=3)and Ture)or(False and True))
False
In [63]:
print(round(4.556))
5
In [64]:
print(round(4.345))
4
In [67]:
print(round(4.556389,3))
4.556
In [68]:
divmod(22,10)
Out[68]:
(2, 2)
In [71]:
G = divmod(34,9)
In [72]:
type(G)
Out[72]:
tuple
In [73]:
print(G)
(3, 7)
In [74]:
G[0]
Out[74]:
3
In [75]:
G[1]
Out[75]:
7
In [76]:
34//9
Out[76]:
3
In [77]:
34%9 #remainder
Out[77]:
7
In [78]:
isinstance(3,int) # is object a type of sth?
Out[78]:
True
In [81]:
isinstance(3.4,(float,int))
Out[81]:
True
In [83]:
isinstance(2+3j,(int,float,str,complex))
Out[83]:
True
In [84]:
pow(2,4) # same with 2**4
Out[84]:
16
In [85]:
2**4
Out[85]:
16
In [86]:
pow(2,4,7) #2**4%7
Out[86]:
2
In [87]:
x = input("enter a number :")
enter a number :56
In [88]:
type(x)
Out[88]:
str
In [89]:
x = int(x) #change the type of x as integer
In [90]:
type(x)
Out[90]:
int
In [91]:
print(x-34)
22
In [92]:
a = float(input("Enter a real number :"))
Enter a real number :12.5
In [93]:
type(a)
Out[93]:
float
In [94]:
b = float(input("Enter a real number : "))
Enter a real number : abc
---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
Cell In[94], line 1
----> 1 b = float(input("Enter a real number : "))

ValueError: could not convert string to float: 'abc'
In [98]:
pow? #don't know how to use the function then add ?
  Cell In[98], line 1
    pow? #don't know how to use the function then add ?
       ^
SyntaxError: invalid syntax
In [99]:
help(input) #don't know how to use the function then use help()
Help on method raw_input in module ipykernel.kernelbase:

raw_input(prompt='') method of ipykernel.ipkernel.IPythonKernel instance
    Forward raw_input to frontends
    
    Raises
    ------
    StdinNotImplementedError if active frontend doesn't support stdin.

In [101]:
a = int(input())
b = int(input())
if a>b:
    print(a)
    print("I am still inside if condition")
print("I am outside the if condition") # diff block which not depend on if condition/ regardless of the result, should print this
10
45
I am outside the if condition
In [103]:
a = int(input())
b = int(input())
if a>b:
    print(a)
if b>a:
    print(b)
22
4
22
In [106]:
a = int(input())
b = int(input())
if a>b:
    print(a)
    print("if part")
else:
    print(b)
    print("else part")
10
10
10
else part
In [109]:
a = 10
b = 10
if a==b:
    print("Equal")
elif a>b:
    print("A")
else:
    print("B")
print("Not in if")
Equal
Not in if
In [111]:
a = int(input("Enter Marks :"))
if a >= 85:
    print("A Grade")
elif (a < 85) and (a >= 80): #write prenthesis makes it more readable
    print("A- Grade")
elif (a < 80) and (a >= 75):
    print("B Grade")
elif (a < 75) and (a >= 70):
    print("B- Grade")
else:
    print("Below Average")
Enter Marks :64
Below Average
In [112]:
#esle 안쓰고 else 사용하기
a = 3
if a>10:
    print(">10")
elif not(a>10):
    print("Else part")
Else part
In [114]:
a = int(input())
if a > 10:
    print(">10")
    print("Inside the top if")
    if a > 20:
        print(">20")
        print("Inside the nested if")
        if a>30:
            print(">30")
            print("inside the nested if of nested if")
        else:
            print("<=30")
            print("inside the else part of nested if of nested if")
    else:
        print("<=20")
        print("Inside the else part of nested if")
print("Outside all ifs")
25
>10
Inside the top if
>20
Inside the nested if
<=30
inside the else part of nested if of nested if
Outside all ifs
In [132]:
#single line comment
""" Multi line comment
User will enter a floating point number let say 238.915. 
Your task is to find out the integer portion before the point (in this case 238)
and then check if that integer portion is an even number or not
"""
x = float(input("Enter a real number :"))
y = round(x)
if x>0:
    if y>x:
        intPortion = y-1 #29.6
    else:
        intPortion = y
else:
    if y<x:
        intPortion = y+1
    else:
        intPortion = y

if intPortion%2 == 0:
    print("Evne")
else:
    print("Odd")
Enter a real number :-87.3
Odd
In [119]:
round(-9.3)
Out[119]:
-9
In [120]:
round(-9.6)
Out[120]:
-10
In [133]:
n = int(input())
i = 1
while (i < n):
    print(i**2)
    print("This is iteration number:", i)
    i += 1 #i = i+1
print("Loop done")
5
1
This is iteration number: 1
4
This is iteration number: 2
9
This is iteration number: 3
16
This is iteration number: 4
Loop done
In [134]:
n = 10
i = 1
while True:
    if i%9 == 0:
        print("Inside if")
        break
    else:
        print("Inside else")
        i = i+1
print("done")
Inside else
Inside else
Inside else
Inside else
Inside else
Inside else
Inside else
Inside else
Inside if
done
In [135]:
n = 10
i = 1
while True:
    if i%9 != 0:
        print("inside if")
        i +=1
        continue
    print("something")
    print("somethingelse")
    break
    
print("done")
inside if
inside if
inside if
inside if
inside if
inside if
inside if
inside if
something
somethingelse
done
In [137]:
L = []
for i in range(0,10,2): #0 start/10 end/ 2 step size
    print(i)
    L.append(i**2)
print(L)
0
2
4
6
8
[0, 4, 16, 36, 64]
In [139]:
S = {"apple", 4.9, "cherry"}
i = 1
for x in S: #as long as the x in S
    print(x)
    i += 1
    if i == 3:
        break
    else:
        pass
else:
    print("Loop terminates with success")
print("Out side the loop")
apple
4.9
Out side the loop
In [142]:
D = {"A":10, "B":-19, "C":"abc"}
for x in D:
    print(x, D[x])
A 10
B -19
C abc
In [148]:
""" Given a list of numbers i.e. [1,2,4,-5,7,9,3,2], make another list
that contains all the items in sorted order from min to max. i.e. your 
result will be another list like [-5,1,2,2,3,4,7,9]
"""
L = [1,2,4,-5,7,9,3,2]
for j in range(len(L)): #length of L
    m = L[j]
    idx = j
    c = j
    for i in range(j,len(L)):
        if L[i]<m:
            m = L[i]
            idx = c
        c += 1
    tmp = L[j]
    L[j] = m
    L[idx] = tmp
print(L)
[-5, 1, 2, 2, 3, 4, 7, 9]
In [150]:
#Refer stackoverflow answers
L = [1,2,4,-5,7,9,3,2]
m = L[0]
idx = 0
for i in L:
    if i<m:
        m = i
    idx += 1
print(idx, m)
8 -5

Functions¶

In [151]:
def printSuccess():
    print("I am done")
    print("send me another task")
In [152]:
printSuccess()
I am done
send me another task
In [153]:
3+8
Out[153]:
11
In [154]:
printSuccess()
I am done
send me another task
In [155]:
def printSuccess2():
    """This funcion is doing nothing except printing a message.
    That message is "hellow"
    """ #this is doc string #use doc string whenever use functions
    print("hellow")
In [161]:
help(printSuccess2)
Help on function printSuccess2 in module __main__:

printSuccess2()
    This funcion is doing nothing except printing a message.
    That message is "hellow"

In [162]:
printSuccess2()
hellow
In [163]:
def printMessage(msg):
    """The function prints the message supplied by the user
    or prints that msg is not in the form of string"""

    if isinstance(msg,str):
        print(msg)
    else:
        print("Your input argument is not string")
        print("Here is the type of what you have supplied :", type(msg))
In [164]:
help(printMessage)
Help on function printMessage in module __main__:

printMessage(msg)
    The function prints the message supplied by the user
    or prints that msg is not in the form of string

In [167]:
printMessage??
In [168]:
printMessage("This is the message")
This is the message
In [169]:
printMessage(23)
Your input argument is not string
Here is the type of what you have supplied : <class 'int'>
In [170]:
y = "hellow there"
printMessage(y)
hellow there
In [171]:
#multiple arguments
def mypow(a,b):
    """this function compute power just like builtin pow function"""
    c = a**b
    print(c)
In [172]:
mypow?
In [173]:
mypow(3,4)
81
In [174]:
def checkArgs(a,b,c):
    if isinstance(a,(int,float)) and isinstance(b,(int,float)) and isinstance(c,(int,float)):
        print((a+b+c)**2)
    else:
        print("Error: the input arguments are not of the expected types")
In [175]:
checkArgs(3,4,5)
144
In [176]:
checkArgs(3,4,"g")
Error: the input arguments are not of the expected types
In [177]:
checkArgs(3,4)
---------------------------------------------------------------------------
TypeError                                 Traceback (most recent call last)
Cell In[177], line 1
----> 1 checkArgs(3,4)

TypeError: checkArgs() missing 1 required positional argument: 'c'
In [178]:
checkArgs(2,3,4,5)
---------------------------------------------------------------------------
TypeError                                 Traceback (most recent call last)
Cell In[178], line 1
----> 1 checkArgs(2,3,4,5)

TypeError: checkArgs() takes 3 positional arguments but 4 were given
In [179]:
#Order of input arguments
def f(a,b,c):
    print("A is :", a)
    print("B is :", b)
    print("C is :", c)
In [181]:
#f(2,3,"game")
f(3,"game",2)
A is : 3
B is : game
C is : 2
In [183]:
#f(a = 2, b = 3, c ="game")
f(c = "game", a = 2, b = 3)
A is : 2
B is : 3
C is : game
In [191]:
#Function(return statement)
def myadd(a,b):
    sumValue = a+b
    return sumValue
In [192]:
d = myadd(2,3)
print(d)
5
In [193]:
variableOutSideTheFunction = 3
In [201]:
def g():
    variableOutSideTheFunction = 5
    print(variableOutSideTheFunction) # inside the function
In [203]:
g()
5
In [199]:
print(variableOutSideTheFunction) # outside the function
3
In [204]:
def g():
    variableOutSideTheFunction = 5
    #print(variableOutSideTheFunction) # inside the function
In [206]:
print(g()) #if print isn't set inside the function, then none will be result when print(that func()).
print(type(g()))
None
<class 'NoneType'>
In [215]:
def h():
    print("A")
    a = 3
    b = 5
    c = a+b
    print("something")
    return  # Like this if I dont return anything, it means exit the function right away
    print("B")
    print("C")
In [216]:
print(h())
A
something
None
In [217]:
def h():
    print("A")
    a = 3
    b = 5
    c = a+b
    print("something")
    return c
    print("B")
    print("C")
In [218]:
print(type(h()))
A
something
<class 'int'>
In [219]:
def r():
    a = 5
    b = 7
    d = "something"
    return a,b,d #can return multiple values
In [220]:
x,y,z = r()
print(x,y,z)
5 7 something
In [221]:
def myAddUniversal(*args):
    s = 0
    for i in range(len(args)):
        s += args[i] #s = s+args[i]
    return s
In [222]:
print(myAddUniversal(2,4,5))
11
In [223]:
def printAllVariableNamesAndValues(**args):# ** you'll receive key value pair list
    for x in args:
        print("Variable Name is :", x,"and Value is :", args[x])
In [224]:
printAllVariableNamesAndValues(a = 3, b = "B", c = "CCC", y = 6.7)
Variable Name is : a and Value is : 3
Variable Name is : b and Value is : B
Variable Name is : c and Value is : CCC
Variable Name is : y and Value is : 6.7
In [225]:
#default value
def gg(s=4):
    print(s) #once press shift+enter, s is assigned as 4
In [226]:
gg()
4
In [227]:
gg(56)
56
In [228]:
L = [1,2,3]
L2 = L
L2[0] = -9
print(L)
[-9, 2, 3]
In [229]:
def ff(L =[1,2]):
    for i in L:
        print(i)
In [230]:
L2 = [12,3,4]
ff()
1
2
In [231]:
ff(L2)
12
3
4
In [232]:
ff() #already default value is assigned as L = [1,2]
1
2
In [233]:
#Modules

import sys
sys.path.append('/Users/soyeonpark/ABC')
In [234]:
#import all functions
import my_universal_functions as myfs

#import one function
#from my_universal_functions import addAllNumerics
In [235]:
myfs.addAllNumerics??
In [237]:
c = myfs.addAllNumerics(2,3,4,6)
In [238]:
print(c)
15
In [239]:
myfs.myName
Out[239]:
'Python Cours'
In [240]:
""" Given a list of numbers i.e. [1,2,4,-5,7,9,3,2], make another list
that contains all the items in sorted order from min to max. i.e. your 
result will be another list like [-5,1,2,2,3,4,7,9]
"""
Out[240]:
' Given a list of numbers i.e. [1,2,4,-5,7,9,3,2], make another list\nthat contains all the items in sorted order from min to max. i.e. your \nresult will be another list like [-5,1,2,2,3,4,7,9]\n'
In [250]:
def findMin(L, startIndx):
    m = L[startIndx]
    idx = startIndx
    for i in range(startIndx,len(L)):
        x = L[i]
        if x<m:
            m = x
            idx = i
        else:
            pass
        i += 1
    return m, idx
In [245]:
a,b = findMin([2,3,4,0,9])
In [246]:
print(a,b)
0 3
In [247]:
def swapValues(L,idx1,idx2):
    tmp = L[idx1]
    L[idx1] = L[idx2]
    L[idx2] = tmp
    return L
In [249]:
L = [2,3,6,7]
L2 = swapValues(L,1,3)
print(L2)
[2, 7, 6, 3]
In [267]:
from my_universal_functions import checkIfNotNumeric
def sortList(L):
    if not(checkIfNotNumeric2(L)):
        print("Error: List does not contain numeric values")
        return
    else:
        c = 0
        for x in L:
            m, idx = findMin(L, c)
            L = swapValues(L,c,idx)
            c += 1
    return L
In [268]:
L2 = sortList([2,1,5,3,-8,17])
print(L2)
[-8, 1, 2, 3, 5, 17]
In [253]:
checkIfNotNumeric??
In [264]:
checkIfNotNumeric2([2,1,5,3,-8,17])
Out[264]:
True
In [263]:
def checkIfNotNumeric2(L):
    for x in L:
        if not(isinstance(x, (int, float))):
            return False
    return True
In [269]:
#String
s = "Python is a good language"
t = 'Its good for data science'
In [270]:
type(s)
Out[270]:
str
In [271]:
print(s)
Python is a good language
In [272]:
print("hellow", 12, "hellow2", 'who are you', 5.9)
hellow 12 hellow2 who are you 5.9
In [275]:
v = s + " " + t #str+str+str
print(v)
Python is a good language Its good for data science
In [282]:
price = 12
s = "The price of this book"
v = s + ' is: '+ str(price) #str+str+int X --> str(int) 
print(v)
print(s,"is:", price) #print() automatically add space between arguments/ in thic case, don't need to chage the type
The price of this book is: 12
The price of this book is: 12
In [284]:
#String(Multi line String)
a = """this is line 1
this is line 2
this is last line and this line is 3"""
print(a)
this is line 1
this is line 2
this is last line and this line is 3
In [286]:
print(""" The following options are available: 
            -a      :does nothing
            -b      :also does nothing
""")
 The following options are available: 
            -a      :does nothing
            -b      :also does nothing

In [288]:
s = "How are you and who are you"
print(s[5])
r
In [289]:
print(type(s[5]))
<class 'str'>
In [290]:
s[3:8]
Out[290]:
' are '
In [291]:
s[-1] #negative index is starting from right / -1 is the last letter
Out[291]:
'u'
In [293]:
s[-12:-3]
Out[293]:
' who are '
In [294]:
s[1] = "e"  # it's not possible. it's inmutable(unchanable) once the string is declared
---------------------------------------------------------------------------
TypeError                                 Traceback (most recent call last)
Cell In[294], line 1
----> 1 s[1] = "e"

TypeError: 'str' object does not support item assignment
In [295]:
s[0:12:2] # it skips with the step size 2 till 12(excluded) s[start:end:step]
Out[295]:
'Hwaeyu'
In [296]:
s
Out[296]:
'How are you and who are you'
In [297]:
s[:12]
Out[297]:
'How are you '
In [298]:
s[3:]
Out[298]:
' are you and who are you'
In [299]:
s[1:12]
Out[299]:
'ow are you '
In [300]:
s[::-1] # reverse the way
Out[300]:
'uoy era ohw dna uoy era woH'
In [301]:
print(len(a))
66
In [302]:
print(len(a[3:8]))
5
In [304]:
a = "    abc def     hgq   asdgeg" 
b = a.strip() #remove space in the beginning and end
print(b)
abc def     hgq   asdgeg
In [305]:
a = "ABC deFg ;; sadfa QF"
b = a.lower() #change all the characters into lower case
print(b)
abc defg ;; sadfa qf
In [306]:
c = a.upper() # change to upper case
print(c)
ABC DEFG ;; SADFA QF
In [307]:
d = a.replace(";","*") #replace the first one to the second one
print(d)
ABC deFg ** sadfa QF
In [308]:
d = a.replace(";","**&&^^%%")
print(d)
ABC deFg **&&^^%%**&&^^%% sadfa QF
In [309]:
d = a.replace(";;","two semi colons")
print(d)
ABC deFg two semi colons sadfa QF
In [310]:
a = "abc;def;hgydfa;yy23" # want to separate them
L = a.split(";") #split elements by the standard " "
print(L) #abc is one element
['abc', 'def', 'hgydfa', 'yy23']
In [311]:
L[1]
Out[311]:
'def'
In [313]:
#. and tap button --> appear all the functions that can be used
print(a.capitalize())
Abc;def;hgydfa;yy23
In [314]:
"abdAfadfGGQ".capitalize()
Out[314]:
'Abdafadfggq'
In [315]:
help(a.count)
Help on built-in function count:

count(...) method of builtins.str instance
    S.count(sub[, start[, end]]) -> int
    
    Return the number of non-overlapping occurrences of substring sub in
    string S[start:end].  Optional arguments start and end are
    interpreted as in slice notation.

In [317]:
"abc" in "asdfsafsjflskfjabclskjf"
Out[317]:
True
In [318]:
"abc" == "abc" #can use == at string
Out[318]:
True
In [319]:
"abc" < "def" #what does it mean? --> following alphabet order in Python
Out[319]:
True
In [320]:
"$%" < "*&"
Out[320]:
True
In [322]:
"acd" not in "ackljlkfj"
Out[322]:
True
In [323]:
print("we are learning "string" here")
  Cell In[323], line 1
    print("we are learning "string" here")
          ^
SyntaxError: invalid syntax. Perhaps you forgot a comma?
In [325]:
print("we are learning \"string\"here") #use backslash
we are learning "string"here
In [326]:
print('we are learning "string" here') # or use single quot 
we are learning "string" here
In [327]:
print("we are \n now on another line") # for another line, use \n
we are 
 now on another line
In [328]:
print("we are \t now on another line")
we are 	 now on another line
In [329]:
print("c:\name\drive") # here \n acts like for another line
c:
ame\drive
In [330]:
print(r"c:\name\drive") # use r ahead to accept it as a raw string
c:\name\drive

Data Structures¶

In [331]:
#List
L = [1,3,4.9,"name",3]
#Tuple
T = (1,3,4.9,"name",3)
#Set
S = {1,3,4.9,"name",3}
#Dictionary
D = {23:"twothree", 'B':43, 'C':'CCD'}
In [334]:
print("The type of L is ", type(L))
print("The type of T is ", type(T))
print("The type of S is ", type(S))
print("The type of D is ", type(D))
The type of L is  <class 'list'>
The type of T is  <class 'tuple'>
The type of S is  <class 'set'>
The type of D is  <class 'dict'>
In [335]:
print(L[1])
print(T[1])
print(3 in S)
print(D[23])
3
3
True
twothree
In [336]:
print(D['B'])
43
In [337]:
S # no duplicate
Out[337]:
{1, 3, 4.9, 'name'}
In [338]:
L
Out[338]:
[1, 3, 4.9, 'name', 3]
In [339]:
L[1:3]
Out[339]:
[3, 4.9]
In [340]:
L[::-1]
Out[340]:
[3, 'name', 4.9, 3, 1]
In [341]:
T[:3]
Out[341]:
(1, 3, 4.9)
In [342]:
L = L + ["how", "are", 6, "you"] #add element
In [343]:
L
Out[343]:
[1, 3, 4.9, 'name', 3, 'how', 'are', 6, 'you']
In [344]:
L.append(6.8) #add element
In [345]:
L
Out[345]:
[1, 3, 4.9, 'name', 3, 'how', 'are', 6, 'you', 6.8]
In [348]:
T2 = ('a', 'b', 45)
T3 = T + T2  #combine tuples
In [349]:
T3
Out[349]:
(1, 3, 4.9, 'name', 3, 'a', 'b', 45)
In [350]:
S
Out[350]:
{1, 3, 4.9, 'name'}
In [351]:
S.add(56) #add element
In [352]:
S
Out[352]:
{1, 3, 4.9, 56, 'name'}
In [353]:
S.update({23,"game",1}) #add multiple elements
In [354]:
S
Out[354]:
{1, 23, 3, 4.9, 56, 'game', 'name'}
In [355]:
D
Out[355]:
{23: 'twothree', 'B': 43, 'C': 'CCD'}
In [356]:
D['newKey'] = "newValue"
In [357]:
D
Out[357]:
{23: 'twothree', 'B': 43, 'C': 'CCD', 'newKey': 'newValue'}
In [358]:
D2 = {"y":"YY", "z":10}
In [370]:
D3 = D + D2 #not possible to combine Dictionaries /But can update like D.update(D2)
---------------------------------------------------------------------------
TypeError                                 Traceback (most recent call last)
Cell In[370], line 1
----> 1 D3 = D + D2 #not possible to combine Dictionaries
      3 D.update(D2)

TypeError: unsupported operand type(s) for +: 'dict' and 'dict'
In [361]:
L
Out[361]:
[1, 3, 4.9, 'name', 3, 'how', 'are', 6, 'you', 6.8]
In [362]:
del L[3] #delete element
In [363]:
L
Out[363]:
[1, 3, 4.9, 3, 'how', 'are', 6, 'you', 6.8]
In [364]:
S
Out[364]:
{1, 23, 3, 4.9, 56, 'game', 'name'}
In [365]:
S.remove('game')
In [366]:
S
Out[366]:
{1, 23, 3, 4.9, 56, 'name'}
In [367]:
D
Out[367]:
{23: 'twothree', 'B': 43, 'C': 'CCD', 'newKey': 'newValue'}
In [368]:
del D['C']
In [369]:
D
Out[369]:
{23: 'twothree', 'B': 43, 'newKey': 'newValue'}
In [371]:
#copy function
L
Out[371]:
[1, 3, 4.9, 3, 'how', 'are', 6, 'you', 6.8]
In [372]:
L2 = L
In [373]:
L2 #is saved in the same memory. so if we change one element in one list then the other is also changed
Out[373]:
[1, 3, 4.9, 3, 'how', 'are', 6, 'you', 6.8]
In [374]:
L2[2] = "four point nine"
In [375]:
L2
Out[375]:
[1, 3, 'four point nine', 3, 'how', 'are', 6, 'you', 6.8]
In [376]:
L
Out[376]:
[1, 3, 'four point nine', 3, 'how', 'are', 6, 'you', 6.8]
In [377]:
L2 = L.copy() # should use copy function to save different memory/ Same at Set, Dictionary (Tuple doesn't use it)
In [378]:
L2
Out[378]:
[1, 3, 'four point nine', 3, 'how', 'are', 6, 'you', 6.8]
In [379]:
L
Out[379]:
[1, 3, 'four point nine', 3, 'how', 'are', 6, 'you', 6.8]
In [380]:
L2[1] = 'one'
In [381]:
L2
Out[381]:
[1, 'one', 'four point nine', 3, 'how', 'are', 6, 'you', 6.8]
In [382]:
L
Out[382]:
[1, 3, 'four point nine', 3, 'how', 'are', 6, 'you', 6.8]
In [383]:
L3 = L[1:5] #L3 is completely in new memory
In [384]:
L3
Out[384]:
[3, 'four point nine', 3, 'how']
In [385]:
L3[0] = "three"
In [386]:
L
Out[386]:
[1, 3, 'four point nine', 3, 'how', 'are', 6, 'you', 6.8]
In [387]:
help(L.append)
Help on built-in function append:

append(object, /) method of builtins.list instance
    Append object to the end of the list.

In [388]:
L.clear?
In [389]:
L.pop?
In [390]:
L.reverse()
In [391]:
L
Out[391]:
[6.8, 'you', 6, 'are', 'how', 3, 'four point nine', 3, 1]
In [392]:
L[::-1]
Out[392]:
[1, 3, 'four point nine', 3, 'how', 'are', 6, 'you', 6.8]
In [393]:
D.items?
In [394]:
L
Out[394]:
[6.8, 'you', 6, 'are', 'how', 3, 'four point nine', 3, 1]
In [395]:
T
Out[395]:
(1, 3, 4.9, 'name', 3)
In [396]:
S
Out[396]:
{1, 23, 3, 4.9, 56, 'name'}
In [397]:
D
Out[397]:
{23: 'twothree', 'B': 43, 'newKey': 'newValue'}
In [398]:
D2 = {'A':L, 'B':T, 'C':S, 'D':D} # one data structure can have the other structure as element
In [400]:
D2['A'][3] #at the Value of A, want to know the third position
Out[400]:
'are'
In [401]:
K = D2['D']
In [402]:
K
Out[402]:
{23: 'twothree', 'B': 43, 'newKey': 'newValue'}
In [403]:
for x in K:
    print(x,K[x])
23 twothree
B 43
newKey newValue
In [404]:
L3 = [L,T,D,23,"game"] # it's also possible
In [405]:
type(L3[2])
Out[405]:
dict
In [406]:
L3 = [x**2 for x in range(10)]
In [407]:
L3
Out[407]:
[0, 1, 4, 9, 16, 25, 36, 49, 64, 81]
In [408]:
S3 = {x**2 for x in range(2,20,3)}
In [409]:
S3
Out[409]:
{4, 25, 64, 121, 196, 289}
In [414]:
"""Let say you are a teacher and you have different student 
records containing id fo a student and the marks list in each subject
where different students have taken different number of subjects .All
these records are in hard copy. You want to enter all the data in computer
and want to compute the average marks of each student and display"""

def getDataFromUser():
    D = {}
    while True:
        studentId = input("Enter student ID: ")
        marksList = input("Enter the marks by comma separated values: ")
        moreStudents = input('Enter "no" to quit insertion: ')
        if studentId in D:
            print(studentId, "is already inserted")
        else:
            D[studentId] = marksList.split(",")
        if moreStudents.lower() == "no":
            return D
In [415]:
studentData = getDataFromUser()
Enter student ID: 12
Enter the marks by comma separated values: 56,45,13
Enter "no" to quit insertion: asdf
Enter student ID: 45
Enter the marks by comma separated values: 44,55,66,77,4
Enter "no" to quit insertion: asdfa
Enter student ID: 12
Enter the marks by comma separated values: 45,45
Enter "no" to quit insertion: asdfg
12 is already inserted
Enter student ID: 23
Enter the marks by comma separated values: 45,45
Enter "no" to quit insertion: no
In [416]:
studentData
Out[416]:
{'12': ['56', '45', '13'],
 '45': ['44', '55', '66', '77', '4'],
 '23': ['45', '45']}
In [420]:
def getAvgMarks(D):
    avgMarks = {}
    for x in D:
        L = D[x]
        s = 0
        for marks in L:
            s += int(marks)
        avgMarks[x] = s/len(L)
    return avgMarks
In [422]:
avgM = getAvgMarks(studentData)
In [423]:
avgM
Out[423]:
{'12': 38.0, '45': 49.2, '23': 45.0}
In [424]:
for x in avgM:
    print("Student :", x, "got avg Marks as: ", avgM[x])
Student : 12 got avg Marks as:  38.0
Student : 45 got avg Marks as:  49.2
Student : 23 got avg Marks as:  45.0

Numpy¶

In [425]:
import numpy as np
In [443]:
a = np.array([1,2,3,5,7]) #can define data type as well / i = integer
In [434]:
b = np.array((2,3,5), dtype= 'f') # f = float
In [428]:
print(a)
[1 2 3 5 7]
In [429]:
type(a)
Out[429]:
numpy.ndarray
In [430]:
print(b)
[2 3 5]
In [431]:
type(b)
Out[431]:
numpy.ndarray
In [435]:
a.dtype #what's the data type of a?
Out[435]:
dtype('int32')
In [436]:
b.dtype #what's the data type of b?
Out[436]:
dtype('float32')
In [437]:
#Numpy(Dimension)
import numpy as np
a = np.array([[1,2,3],[4,5,6]]) #2 dimensional array
In [438]:
a.ndim
Out[438]:
2
In [439]:
a[0,2]
Out[439]:
3
In [452]:
B = np.array([[1,2,3],[2,4,5,9]])
---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
Cell In[452], line 1
----> 1 B = np.array([[1,2,3],[2,4,5,9]])

ValueError: setting an array element with a sequence. The requested array has an inhomogeneous shape after 1 dimensions. The detected shape was (2,) + inhomogeneous part.
In [453]:
B.ndim
Out[453]:
1
In [449]:
C = np.array([[[1,2,3],[4,5,6],[0,0,-1]],[[-1,-2,-3],[-4,-5,-6],[0,0,1]]])
In [454]:
C.ndim
Out[454]:
3
In [455]:
C.shape # each 2 dimensional array has 3 array which have 3 items
Out[455]:
(2, 3, 3)
In [451]:
C[1,0,2]
Out[451]:
-3
In [459]:
C.shape[2]
Out[459]:
3
In [461]:
A = np.array([2]) # [] 
In [462]:
A.ndim
Out[462]:
1
In [463]:
B = np.array(3) # no array due to doenst have []
In [464]:
B.ndim
Out[464]:
0
In [465]:
C.size # total number of elements
Out[465]:
18
In [466]:
C.nbytes #how many totals number of bytes
Out[466]:
144
In [467]:
A = np.arange(100) #np.arange()
In [468]:
print(A)
[ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47
 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71
 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95
 96 97 98 99]
In [470]:
A = np.arange(20,100,3) #(start, last(excluded), size) like for i in range(20,100,3)
print(A)
[20 23 26 29 32 35 38 41 44 47 50 53 56 59 62 65 68 71 74 77 80 83 86 89
 92 95 98]
In [471]:
print(range(10)) #range() never create a list
range(0, 10)
In [472]:
print(list(range(10))) # if you want list then write list
[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
In [473]:
A = np.random.permutation(np.arange(10)) #the arrage will be shown as randomly
print(A)
[7 1 3 6 2 9 0 4 8 5]
In [475]:
np.random.randint?
In [481]:
v = np.random.randint(20,30) #some random integer btw 20 and 30
In [482]:
type(v)
Out[482]:
int
In [483]:
print(v)
21
In [484]:
print(v)
21
In [485]:
A = np.random.rand(1000) # random number btw 0 to 999
In [487]:
import matplotlib.pyplot as plt
In [489]:
plt.hist(A, bins=100)
Out[489]:
(array([ 8., 11., 10., 12.,  8., 14., 11., 12.,  6., 10.,  4., 10.,  7.,
        14., 11.,  8.,  8.,  7.,  8., 16., 15.,  8.,  6.,  9., 14.,  9.,
        10., 16.,  7.,  9.,  8.,  7., 12., 11., 10., 14.,  7., 14., 11.,
         4., 11., 11.,  9., 15., 13.,  9., 10., 12., 12., 13., 10.,  7.,
        10.,  7.,  9.,  8., 10.,  8.,  5.,  9.,  6.,  6.,  8., 12.,  8.,
        17., 14.,  7.,  7., 10., 14.,  7.,  9., 12., 13., 14.,  5., 11.,
        10., 11., 14.,  9., 11., 16.,  8.,  7.,  8., 12., 18.,  7.,  4.,
         7., 13.,  9.,  8., 12., 11., 14., 11.,  6.]),
 array([1.81229342e-04, 1.01664234e-02, 2.01516174e-02, 3.01368115e-02,
        4.01220055e-02, 5.01071996e-02, 6.00923936e-02, 7.00775877e-02,
        8.00627817e-02, 9.00479758e-02, 1.00033170e-01, 1.10018364e-01,
        1.20003558e-01, 1.29988752e-01, 1.39973946e-01, 1.49959140e-01,
        1.59944334e-01, 1.69929528e-01, 1.79914722e-01, 1.89899916e-01,
        1.99885110e-01, 2.09870304e-01, 2.19855498e-01, 2.29840692e-01,
        2.39825886e-01, 2.49811081e-01, 2.59796275e-01, 2.69781469e-01,
        2.79766663e-01, 2.89751857e-01, 2.99737051e-01, 3.09722245e-01,
        3.19707439e-01, 3.29692633e-01, 3.39677827e-01, 3.49663021e-01,
        3.59648215e-01, 3.69633409e-01, 3.79618603e-01, 3.89603797e-01,
        3.99588991e-01, 4.09574185e-01, 4.19559379e-01, 4.29544573e-01,
        4.39529767e-01, 4.49514961e-01, 4.59500155e-01, 4.69485350e-01,
        4.79470544e-01, 4.89455738e-01, 4.99440932e-01, 5.09426126e-01,
        5.19411320e-01, 5.29396514e-01, 5.39381708e-01, 5.49366902e-01,
        5.59352096e-01, 5.69337290e-01, 5.79322484e-01, 5.89307678e-01,
        5.99292872e-01, 6.09278066e-01, 6.19263260e-01, 6.29248454e-01,
        6.39233648e-01, 6.49218842e-01, 6.59204036e-01, 6.69189230e-01,
        6.79174425e-01, 6.89159619e-01, 6.99144813e-01, 7.09130007e-01,
        7.19115201e-01, 7.29100395e-01, 7.39085589e-01, 7.49070783e-01,
        7.59055977e-01, 7.69041171e-01, 7.79026365e-01, 7.89011559e-01,
        7.98996753e-01, 8.08981947e-01, 8.18967141e-01, 8.28952335e-01,
        8.38937529e-01, 8.48922723e-01, 8.58907917e-01, 8.68893111e-01,
        8.78878305e-01, 8.88863499e-01, 8.98848694e-01, 9.08833888e-01,
        9.18819082e-01, 9.28804276e-01, 9.38789470e-01, 9.48774664e-01,
        9.58759858e-01, 9.68745052e-01, 9.78730246e-01, 9.88715440e-01,
        9.98700634e-01]),
 <BarContainer object of 100 artists>)
In [490]:
B = np.random.randn(10000)
plt.hist(B, bins=200)
Out[490]:
(array([  1.,   0.,   1.,   0.,   2.,   2.,   1.,   1.,   0.,   2.,   2.,
          0.,   0.,   5.,   4.,   1.,   5.,   2.,   5.,   3.,   4.,   7.,
          7.,   5.,   7.,   2.,  12.,   8.,   6.,  11.,  10.,  10.,  13.,
          9.,  11.,  23.,  19.,  24.,  17.,  18.,  30.,  37.,  29.,  31.,
         25.,  38.,  29.,  37.,  39.,  30.,  45.,  49.,  61.,  49.,  58.,
         54.,  65.,  44.,  78.,  64.,  68.,  82.,  73.,  85.,  82.,  84.,
         98.,  86., 104.,  88., 105., 103., 112., 102., 107., 129., 141.,
        148., 130., 130., 119., 134., 134., 132., 157., 146., 150., 144.,
        148., 151., 151., 170., 164., 145., 124., 148., 161., 146., 129.,
        133., 155., 151., 134., 134., 138., 124., 138., 119., 112., 119.,
        112., 116., 117., 109., 103., 112., 119.,  96.,  98.,  83.,  76.,
         87.,  71.,  79.,  67.,  64.,  58.,  62.,  52.,  70.,  66.,  51.,
         53.,  53.,  43.,  33.,  48.,  39.,  38.,  33.,  27.,  33.,  32.,
         22.,  21.,  22.,  13.,  13.,  21.,  15.,  12.,  16.,  17.,  10.,
         15.,   6.,   7.,   5.,   4.,  11.,   4.,   2.,   6.,   7.,   6.,
          3.,   0.,   3.,   2.,   1.,   3.,   4.,   0.,   2.,   1.,   1.,
          1.,   0.,   0.,   1.,   1.,   0.,   1.,   1.,   0.,   0.,   0.,
          0.,   0.,   0.,   0.,   0.,   0.,   0.,   0.,   0.,   0.,   0.,
          0.,   1.]),
 array([-3.46107023, -3.42369044, -3.38631066, -3.34893087, -3.31155109,
        -3.2741713 , -3.23679152, -3.19941173, -3.16203195, -3.12465216,
        -3.08727238, -3.0498926 , -3.01251281, -2.97513303, -2.93775324,
        -2.90037346, -2.86299367, -2.82561389, -2.7882341 , -2.75085432,
        -2.71347454, -2.67609475, -2.63871497, -2.60133518, -2.5639554 ,
        -2.52657561, -2.48919583, -2.45181604, -2.41443626, -2.37705647,
        -2.33967669, -2.30229691, -2.26491712, -2.22753734, -2.19015755,
        -2.15277777, -2.11539798, -2.0780182 , -2.04063841, -2.00325863,
        -1.96587884, -1.92849906, -1.89111928, -1.85373949, -1.81635971,
        -1.77897992, -1.74160014, -1.70422035, -1.66684057, -1.62946078,
        -1.592081  , -1.55470122, -1.51732143, -1.47994165, -1.44256186,
        -1.40518208, -1.36780229, -1.33042251, -1.29304272, -1.25566294,
        -1.21828315, -1.18090337, -1.14352359, -1.1061438 , -1.06876402,
        -1.03138423, -0.99400445, -0.95662466, -0.91924488, -0.88186509,
        -0.84448531, -0.80710552, -0.76972574, -0.73234596, -0.69496617,
        -0.65758639, -0.6202066 , -0.58282682, -0.54544703, -0.50806725,
        -0.47068746, -0.43330768, -0.3959279 , -0.35854811, -0.32116833,
        -0.28378854, -0.24640876, -0.20902897, -0.17164919, -0.1342694 ,
        -0.09688962, -0.05950983, -0.02213005,  0.01524973,  0.05262952,
         0.0900093 ,  0.12738909,  0.16476887,  0.20214866,  0.23952844,
         0.27690823,  0.31428801,  0.35166779,  0.38904758,  0.42642736,
         0.46380715,  0.50118693,  0.53856672,  0.5759465 ,  0.61332629,
         0.65070607,  0.68808586,  0.72546564,  0.76284542,  0.80022521,
         0.83760499,  0.87498478,  0.91236456,  0.94974435,  0.98712413,
         1.02450392,  1.0618837 ,  1.09926349,  1.13664327,  1.17402305,
         1.21140284,  1.24878262,  1.28616241,  1.32354219,  1.36092198,
         1.39830176,  1.43568155,  1.47306133,  1.51044111,  1.5478209 ,
         1.58520068,  1.62258047,  1.65996025,  1.69734004,  1.73471982,
         1.77209961,  1.80947939,  1.84685918,  1.88423896,  1.92161874,
         1.95899853,  1.99637831,  2.0337581 ,  2.07113788,  2.10851767,
         2.14589745,  2.18327724,  2.22065702,  2.25803681,  2.29541659,
         2.33279637,  2.37017616,  2.40755594,  2.44493573,  2.48231551,
         2.5196953 ,  2.55707508,  2.59445487,  2.63183465,  2.66921443,
         2.70659422,  2.743974  ,  2.78135379,  2.81873357,  2.85611336,
         2.89349314,  2.93087293,  2.96825271,  3.0056325 ,  3.04301228,
         3.08039206,  3.11777185,  3.15515163,  3.19253142,  3.2299112 ,
         3.26729099,  3.30467077,  3.34205056,  3.37943034,  3.41681013,
         3.45418991,  3.49156969,  3.52894948,  3.56632926,  3.60370905,
         3.64108883,  3.67846862,  3.7158484 ,  3.75322819,  3.79060797,
         3.82798775,  3.86536754,  3.90274732,  3.94012711,  3.97750689,
         4.01488668]),
 <BarContainer object of 200 artists>)
In [491]:
C = np.random.rand(2,3) #creat random dimensional array
In [492]:
C
Out[492]:
array([[0.28479749, 0.01496218, 0.21795265],
       [0.29856608, 0.95685224, 0.28837714]])
In [493]:
C.ndim
Out[493]:
2
In [494]:
C = np.random.rand(2,3,4,2)
In [495]:
C.ndim
Out[495]:
4
In [496]:
C
Out[496]:
array([[[[0.19808352, 0.90750045],
         [0.13427161, 0.90885532],
         [0.72531304, 0.67907578],
         [0.82546805, 0.83697186]],

        [[0.58072095, 0.80740941],
         [0.71044031, 0.89191532],
         [0.83809592, 0.12233693],
         [0.61687587, 0.40968328]],

        [[0.81907427, 0.41232916],
         [0.16349493, 0.43935106],
         [0.12297962, 0.00511371],
         [0.30436043, 0.38564601]]],


       [[[0.83505318, 0.75894666],
         [0.79148087, 0.04404956],
         [0.4473328 , 0.674635  ],
         [0.22660527, 0.51864064]],

        [[0.16631482, 0.44773438],
         [0.76406986, 0.98746421],
         [0.17695519, 0.62339607],
         [0.78282051, 0.74379242]],

        [[0.39200303, 0.76973911],
         [0.89048465, 0.62529171],
         [0.83263492, 0.4479187 ],
         [0.38048678, 0.84980509]]]])
In [497]:
D = np.arange(100).reshape(4,25) #reshape arrange following (a,b)
In [498]:
D.shape
Out[498]:
(4, 25)
In [499]:
D
Out[499]:
array([[ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15,
        16, 17, 18, 19, 20, 21, 22, 23, 24],
       [25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40,
        41, 42, 43, 44, 45, 46, 47, 48, 49],
       [50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65,
        66, 67, 68, 69, 70, 71, 72, 73, 74],
       [75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90,
        91, 92, 93, 94, 95, 96, 97, 98, 99]])
In [500]:
D = np.arange(100).reshape(4,5,5)
In [501]:
D.shape
Out[501]:
(4, 5, 5)
In [502]:
D
Out[502]:
array([[[ 0,  1,  2,  3,  4],
        [ 5,  6,  7,  8,  9],
        [10, 11, 12, 13, 14],
        [15, 16, 17, 18, 19],
        [20, 21, 22, 23, 24]],

       [[25, 26, 27, 28, 29],
        [30, 31, 32, 33, 34],
        [35, 36, 37, 38, 39],
        [40, 41, 42, 43, 44],
        [45, 46, 47, 48, 49]],

       [[50, 51, 52, 53, 54],
        [55, 56, 57, 58, 59],
        [60, 61, 62, 63, 64],
        [65, 66, 67, 68, 69],
        [70, 71, 72, 73, 74]],

       [[75, 76, 77, 78, 79],
        [80, 81, 82, 83, 84],
        [85, 86, 87, 88, 89],
        [90, 91, 92, 93, 94],
        [95, 96, 97, 98, 99]]])
In [503]:
np.zeros?
In [504]:
np.ones?
In [564]:
#Numpy(Slicing)
A = np.arange(100)
print(A)
[ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47
 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71
 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95
 96 97 98 99]
In [565]:
b = A[3:10]
print(b)
[3 4 5 6 7 8 9]
In [566]:
b[0] = -1200
In [567]:
b
Out[567]:
array([-1200,     4,     5,     6,     7,     8,     9])
In [568]:
A #it's also changed because it's in the same memory / That's the big difference btw ordinary list and slicing
Out[568]:
array([    0,     1,     2, -1200,     4,     5,     6,     7,     8,
           9,    10,    11,    12,    13,    14,    15,    16,    17,
          18,    19,    20,    21,    22,    23,    24,    25,    26,
          27,    28,    29,    30,    31,    32,    33,    34,    35,
          36,    37,    38,    39,    40,    41,    42,    43,    44,
          45,    46,    47,    48,    49,    50,    51,    52,    53,
          54,    55,    56,    57,    58,    59,    60,    61,    62,
          63,    64,    65,    66,    67,    68,    69,    70,    71,
          72,    73,    74,    75,    76,    77,    78,    79,    80,
          81,    82,    83,    84,    85,    86,    87,    88,    89,
          90,    91,    92,    93,    94,    95,    96,    97,    98,
          99])
In [569]:
b = A[3:10].copy() # if want not to change / it's in the different memory
In [570]:
A[::5]
Out[570]:
array([ 0,  5, 10, 15, 20, 25, 30, 35, 40, 45, 50, 55, 60, 65, 70, 75, 80,
       85, 90, 95])
In [571]:
A[::-5]
Out[571]:
array([99, 94, 89, 84, 79, 74, 69, 64, 59, 54, 49, 44, 39, 34, 29, 24, 19,
       14,  9,  4])
In [572]:
A[::-1]
Out[572]:
array([   99,    98,    97,    96,    95,    94,    93,    92,    91,
          90,    89,    88,    87,    86,    85,    84,    83,    82,
          81,    80,    79,    78,    77,    76,    75,    74,    73,
          72,    71,    70,    69,    68,    67,    66,    65,    64,
          63,    62,    61,    60,    59,    58,    57,    56,    55,
          54,    53,    52,    51,    50,    49,    48,    47,    46,
          45,    44,    43,    42,    41,    40,    39,    38,    37,
          36,    35,    34,    33,    32,    31,    30,    29,    28,
          27,    26,    25,    24,    23,    22,    21,    20,    19,
          18,    17,    16,    15,    14,    13,    12,    11,    10,
           9,     8,     7,     6,     5,     4, -1200,     2,     1,
           0])
In [573]:
#I want to find where is the position of -1200
B = (A == -1200)*np.arange(A.size)
print(B)
[0 0 0 3 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
In [574]:
A.indices(-1200)
---------------------------------------------------------------------------
AttributeError                            Traceback (most recent call last)
Cell In[574], line 1
----> 1 A.indices(-1200)

AttributeError: 'numpy.ndarray' object has no attribute 'indices'
In [575]:
idx = np.argwhere(A==-1200)[0][0]
In [535]:
 
In [576]:
idx
Out[576]:
3
In [577]:
A[idx] = 3
In [578]:
A
Out[578]:
array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16,
       17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33,
       34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50,
       51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67,
       68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84,
       85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99])
In [579]:
A = np.round(10*np.random.rand(5,4))
In [544]:
#A = np.random.rand(5,4)
In [545]:
#A
Out[545]:
array([[0.4183266 , 0.01855416, 0.25106059, 0.86243359],
       [0.16981521, 0.31147646, 0.95850589, 0.55175295],
       [0.15552773, 0.10055072, 0.06009027, 0.82065224],
       [0.95136422, 0.74344842, 0.26952486, 0.02808109],
       [0.84601038, 0.01401601, 0.39694368, 0.2467477 ]])
In [580]:
A
Out[580]:
array([[ 4.,  1.,  5.,  5.],
       [ 3.,  1.,  7.,  4.],
       [ 2.,  8.,  0.,  7.],
       [ 3.,  0., 10., 10.],
       [ 6.,  8.,  1.,  7.]])
In [581]:
A[1,2]
Out[581]:
7.0
In [582]:
A[1,:] # for whole second row
Out[582]:
array([3., 1., 7., 4.])
In [583]:
A[:,1] # for whole second column
Out[583]:
array([1., 1., 8., 0., 8.])
In [585]:
Z = A[1:3,2:4] #row number 1 to row number 3 & column number 2 to column number 4
In [586]:
Z
Out[586]:
array([[7., 4.],
       [0., 7.]])
In [587]:
Z.T # transpose 행과 열 바꾸기
Out[587]:
array([[7., 0.],
       [4., 7.]])
In [588]:
import numpy.linalg as la #linear algebra library 역행렬구하기
In [589]:
la.inv(np.random.rand(3,3))
Out[589]:
array([[-0.82473966,  4.33580269, -0.46464079],
       [ 1.50081031,  0.34083488, -0.36621829],
       [-0.8377294 , -1.67459439,  1.4754175 ]])
In [597]:
Z
Out[597]:
array([[1., 5.],
       [5., 7.]])
In [598]:
Za = Z.sort(axis=0) #행 축으로 정렬 # 여기부터 다시 해보기 
In [599]:
Za
In [559]:
A.sort(axis=1) #열 축으로 정렬
In [560]:
A
Out[560]:
array([[0., 0., 1., 2.],
       [1., 1., 3., 3.],
       [2., 4., 4., 4.],
       [5., 6., 6., 8.],
       [7., 8., 8., 9.]])
In [600]:
#Numpy(More Indexing)
A = np.arange(100)
In [601]:
B = A[[3,5,6]]
In [602]:
B
Out[602]:
array([3, 5, 6])
In [605]:
B[0] = -4
In [606]:
B
Out[606]:
array([-4,  5,  6])
In [604]:
A # A doesn't change becuase we use B = A[[3,5,6]]
Out[604]:
array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16,
       17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33,
       34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50,
       51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67,
       68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84,
       85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99])
In [607]:
B = A[A<40] # access all elements less than 40
In [608]:
B
Out[608]:
array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16,
       17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33,
       34, 35, 36, 37, 38, 39])
In [609]:
B = A[(A<40) & (A>30)] # btw 30 and 40
In [610]:
B
Out[610]:
array([31, 32, 33, 34, 35, 36, 37, 38, 39])
In [611]:
#& for list / and for single object
#/ for list / or for single object
#~ for list/ not for single object
In [616]:
#Numpy(Broadcasting)
A = np.round(10*np.random.rand(2,3))
In [617]:
A
Out[617]:
array([[2., 8., 3.],
       [5., 4., 1.]])
In [619]:
A+3
Out[619]:
array([[ 5., 11.,  6.],
       [ 8.,  7.,  4.]])
In [620]:
A+(np.arange(2).reshape(2,1)) 
Out[620]:
array([[2., 8., 3.],
       [6., 5., 2.]])
In [621]:
print(np.arange(2))
[0 1]
In [622]:
#stacks
B= np.round(10*np.random.rand(2,2))
In [623]:
A
Out[623]:
array([[2., 8., 3.],
       [5., 4., 1.]])
In [624]:
B
Out[624]:
array([[5., 9.],
       [4., 4.]])
In [625]:
C = np.hstack((A,B)) #가로로
In [626]:
C
Out[626]:
array([[2., 8., 3., 5., 9.],
       [5., 4., 1., 4., 4.]])
In [627]:
A = np.random.permutation(np.arange(10))
In [628]:
A
Out[628]:
array([2, 3, 8, 6, 4, 5, 7, 1, 0, 9])
In [629]:
A.sort() #ascending 오름차순
In [630]:
A
Out[630]:
array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])
In [631]:
np.sort(A)
Out[631]:
array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])
In [632]:
A.sort()
In [633]:
A=A[::-1] #descending 내림차순
In [634]:
A
Out[634]:
array([9, 8, 7, 6, 5, 4, 3, 2, 1, 0])
In [635]:
A = np.array(["abc",'howare you','u765','132r'])
In [636]:
A.sort() # possible in strings like alphabet order
In [637]:
A
Out[637]:
array(['132r', 'abc', 'howare you', 'u765'], dtype='<U10')
In [638]:
#Numpy(Speed: ufuncs)  numpy function is faster for large number/elements
B= np.random.rand(1000000)
%timeit sum(B)
%timeit np.sum(B) #same with B.sum()
38.4 ms ± 144 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)
184 µs ± 1.86 µs per loop (mean ± std. dev. of 7 runs, 10,000 loops each)
In [639]:
def mySum(G):
    s = 0
    for x in G:
        s+=x
    return s
In [640]:
%timeit mySum(B)
43.5 ms ± 506 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)

Pandas¶

In [641]:
import pandas as pd
In [643]:
print(pd.__version__)
1.5.3
In [644]:
A = pd.Series([2,3,4,5], index = ['a','b','c','d']) # Series handle one dimensional array
In [645]:
A.values
Out[645]:
array([2, 3, 4, 5])
In [646]:
type(A.values)
Out[646]:
numpy.ndarray
In [647]:
type(A)
Out[647]:
pandas.core.series.Series
In [648]:
A.index
Out[648]:
Index(['a', 'b', 'c', 'd'], dtype='object')
In [649]:
A['a']
Out[649]:
2
In [650]:
A['a':'c'] #using idex then the final idex is also included
Out[650]:
a    2
b    3
c    4
dtype: int64
In [651]:
#Pandas(Series) using dictionary
grads_dict={'A':4, 'B':3.5, 'C':3, 'D':2.5}
grads = pd.Series(grads_dict)
In [652]:
grads.values
Out[652]:
array([4. , 3.5, 3. , 2.5])
In [653]:
marks_dict = {'A':85, 'B':75, 'C':65, 'D':55}
marks = pd.Series(marks_dict)
In [654]:
marks
Out[654]:
A    85
B    75
C    65
D    55
dtype: int64
In [655]:
marks['A']
Out[655]:
85
In [656]:
marks[0:2]
Out[656]:
A    85
B    75
dtype: int64
In [657]:
#Pandas(DataFrame)
D = pd.DataFrame({'Marks':marks, 'Grades':grads})
In [658]:
D
Out[658]:
Marks Grades
A 85 4.0
B 75 3.5
C 65 3.0
D 55 2.5
In [659]:
D.T #can transpose as well
Out[659]:
A B C D
Marks 85.0 75.0 65.0 55.0
Grades 4.0 3.5 3.0 2.5
In [660]:
D
Out[660]:
Marks Grades
A 85 4.0
B 75 3.5
C 65 3.0
D 55 2.5
In [663]:
D.values
Out[663]:
array([[85. ,  4. ],
       [75. ,  3.5],
       [65. ,  3. ],
       [55. ,  2.5]])
In [664]:
D.values[2,0] #row #3 column #1
Out[664]:
65.0
In [665]:
D.columns
Out[665]:
Index(['Marks', 'Grades'], dtype='object')
In [666]:
D
Out[666]:
Marks Grades
A 85 4.0
B 75 3.5
C 65 3.0
D 55 2.5
In [667]:
D['ScaledMarks'] = 100*(D['Marks']/90) # want to add column
In [668]:
D
Out[668]:
Marks Grades ScaledMarks
A 85 4.0 94.444444
B 75 3.5 83.333333
C 65 3.0 72.222222
D 55 2.5 61.111111
In [669]:
del D['ScaledMarks'] #want to delete column
In [670]:
D
Out[670]:
Marks Grades
A 85 4.0
B 75 3.5
C 65 3.0
D 55 2.5
In [671]:
G = D[D['Marks']>70] # want to pick some data
In [672]:
G
Out[672]:
Marks Grades
A 85 4.0
B 75 3.5
In [675]:
#Pandas(NaN) -- deal with missing values (None)
A = pd.DataFrame([{'a':1, 'b':4}, {'b':-3, 'c':9}])
In [676]:
A
Out[676]:
a b c
0 1.0 4 NaN
1 NaN -3 9.0
In [677]:
A.fillna(0) #fill all na value as 0
Out[677]:
a b c
0 1.0 4 0.0
1 0.0 -3 9.0
In [680]:
A.dropna #drop all the missing values
Out[680]:
<bound method DataFrame.dropna of      a  b    c
0  1.0  4  NaN
1  NaN -3  9.0>
In [681]:
#Pandas(Indexing)
A = pd.Series(['a','b','c'], index = [1, 3, 5])
In [682]:
A[1]
Out[682]:
'a'
In [683]:
A[1:3]
Out[683]:
3    b
5    c
dtype: object
In [684]:
A.loc[1:3] #loc : use explicit indexes
Out[684]:
1    a
3    b
dtype: object
In [685]:
A.iloc[1:3] #iloc: use implicit indexes 기본적으로 첫 번째 값 위치 0
Out[685]:
3    b
5    c
dtype: object
In [686]:
D
Out[686]:
Marks Grades
A 85 4.0
B 75 3.5
C 65 3.0
D 55 2.5
In [687]:
D.iloc[2,:]
Out[687]:
Marks     65.0
Grades     3.0
Name: C, dtype: float64
In [688]:
D.iloc[::-1,:] # want to reverse all the values
Out[688]:
Marks Grades
D 55 2.5
C 65 3.0
B 75 3.5
A 85 4.0
In [689]:
#Pandas(csv files)
import pandas as pd
import numpy as np
from sklearn.impute import SimpleImputer
df = pd.read_csv('/Users/soyeonpark/Downloads/covid_19_data.csv')
In [692]:
df.head(10) #first 10 records
Out[692]:
SNo ObservationDate Province/State Country/Region Last Update Confirmed Deaths Recovered
0 1 01/22/2020 Anhui Mainland China 1/22/2020 17:00 1.0 0.0 0.0
1 2 01/22/2020 Beijing Mainland China 1/22/2020 17:00 14.0 0.0 0.0
2 3 01/22/2020 Chongqing Mainland China 1/22/2020 17:00 6.0 0.0 0.0
3 4 01/22/2020 Fujian Mainland China 1/22/2020 17:00 1.0 0.0 0.0
4 5 01/22/2020 Gansu Mainland China 1/22/2020 17:00 0.0 0.0 0.0
5 6 01/22/2020 Guangdong Mainland China 1/22/2020 17:00 26.0 0.0 0.0
6 7 01/22/2020 Guangxi Mainland China 1/22/2020 17:00 2.0 0.0 0.0
7 8 01/22/2020 Guizhou Mainland China 1/22/2020 17:00 1.0 0.0 0.0
8 9 01/22/2020 Hainan Mainland China 1/22/2020 17:00 4.0 0.0 0.0
9 10 01/22/2020 Hebei Mainland China 1/22/2020 17:00 1.0 0.0 0.0
In [693]:
df.drop(['SNo', 'Last Update'], axis = 1, inplace = True)
# want to delete column. 'axis=1' is do that on the column and 'implace=True' is apply this to df
In [694]:
df.head()
Out[694]:
ObservationDate Province/State Country/Region Confirmed Deaths Recovered
0 01/22/2020 Anhui Mainland China 1.0 0.0 0.0
1 01/22/2020 Beijing Mainland China 14.0 0.0 0.0
2 01/22/2020 Chongqing Mainland China 6.0 0.0 0.0
3 01/22/2020 Fujian Mainland China 1.0 0.0 0.0
4 01/22/2020 Gansu Mainland China 0.0 0.0 0.0
In [695]:
df.rename(columns = {'ObservationDate':'Date', 'Province/State':'Province', 'Country/Region':'Country'}, inplace = True)
#want to rename the column name
In [696]:
df.head()
Out[696]:
Date Province Country Confirmed Deaths Recovered
0 01/22/2020 Anhui Mainland China 1.0 0.0 0.0
1 01/22/2020 Beijing Mainland China 14.0 0.0 0.0
2 01/22/2020 Chongqing Mainland China 6.0 0.0 0.0
3 01/22/2020 Fujian Mainland China 1.0 0.0 0.0
4 01/22/2020 Gansu Mainland China 0.0 0.0 0.0
In [697]:
df['Date'] = pd.to_datetime(df['Date'])
#convert date format to use in Pandas
In [698]:
df.head()
Out[698]:
Date Province Country Confirmed Deaths Recovered
0 2020-01-22 Anhui Mainland China 1.0 0.0 0.0
1 2020-01-22 Beijing Mainland China 14.0 0.0 0.0
2 2020-01-22 Chongqing Mainland China 6.0 0.0 0.0
3 2020-01-22 Fujian Mainland China 1.0 0.0 0.0
4 2020-01-22 Gansu Mainland China 0.0 0.0 0.0
In [699]:
df.describe()
Out[699]:
Confirmed Deaths Recovered
count 3.064290e+05 306429.000000 3.064290e+05
mean 8.567091e+04 2036.403268 5.042029e+04
std 2.775516e+05 6410.938048 2.015124e+05
min -3.028440e+05 -178.000000 -8.544050e+05
25% 1.042000e+03 13.000000 1.100000e+01
50% 1.037500e+04 192.000000 1.751000e+03
75% 5.075200e+04 1322.000000 2.027000e+04
max 5.863138e+06 112385.000000 6.399531e+06
In [700]:
df.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 306429 entries, 0 to 306428
Data columns (total 6 columns):
 #   Column     Non-Null Count   Dtype         
---  ------     --------------   -----         
 0   Date       306429 non-null  datetime64[ns]
 1   Province   228329 non-null  object        
 2   Country    306429 non-null  object        
 3   Confirmed  306429 non-null  float64       
 4   Deaths     306429 non-null  float64       
 5   Recovered  306429 non-null  float64       
dtypes: datetime64[ns](1), float64(3), object(2)
memory usage: 14.0+ MB
In [703]:
df = df.fillna('NA') # fill in the blank as NA
In [704]:
df.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 306429 entries, 0 to 306428
Data columns (total 6 columns):
 #   Column     Non-Null Count   Dtype         
---  ------     --------------   -----         
 0   Date       306429 non-null  datetime64[ns]
 1   Province   306429 non-null  object        
 2   Country    306429 non-null  object        
 3   Confirmed  306429 non-null  float64       
 4   Deaths     306429 non-null  float64       
 5   Recovered  306429 non-null  float64       
dtypes: datetime64[ns](1), float64(3), object(2)
memory usage: 14.0+ MB
In [705]:
df.head(10)
Out[705]:
Date Province Country Confirmed Deaths Recovered
0 2020-01-22 Anhui Mainland China 1.0 0.0 0.0
1 2020-01-22 Beijing Mainland China 14.0 0.0 0.0
2 2020-01-22 Chongqing Mainland China 6.0 0.0 0.0
3 2020-01-22 Fujian Mainland China 1.0 0.0 0.0
4 2020-01-22 Gansu Mainland China 0.0 0.0 0.0
5 2020-01-22 Guangdong Mainland China 26.0 0.0 0.0
6 2020-01-22 Guangxi Mainland China 2.0 0.0 0.0
7 2020-01-22 Guizhou Mainland China 1.0 0.0 0.0
8 2020-01-22 Hainan Mainland China 4.0 0.0 0.0
9 2020-01-22 Hebei Mainland China 1.0 0.0 0.0
In [712]:
#EX:how many total confirmed case in each countries?
df2 = df.groupby('Country')[['Country','Confirmed','Deaths','Recovered']].sum(numeric_only=True).reset_index()
#Country 는 숫자 데이터가 아니라서 numeric_only=True 넣어줌
In [710]:
df2
Out[710]:
Country Confirmed Deaths Recovered
0 Azerbaijan 1.0 0.0 0.0
1 ('St. Martin',) 2.0 0.0 0.0
2 Afghanistan 17026442.0 669075.0 13464399.0
3 Albania 19768869.0 375955.0 13945256.0
4 Algeria 27684358.0 834464.0 18959299.0
... ... ... ... ...
224 West Bank and Gaza 41819444.0 440378.0 37003116.0
225 Yemen 962066.0 237613.0 506523.0
226 Zambia 13493953.0 205990.0 12625626.0
227 Zimbabwe 6484581.0 237234.0 5594887.0
228 occupied Palestinian territory 25.0 0.0 0.0

229 rows × 4 columns

In [714]:
#grupby 'Country' and 'Date'
df2 = df.groupby(['Country','Date'])[['Country','Date','Confirmed','Deaths','Recovered']].sum(numeric_only=True).reset_index()
In [715]:
df2
Out[715]:
Country Date Confirmed Deaths Recovered
0 Azerbaijan 2020-02-28 1.0 0.0 0.0
1 ('St. Martin',) 2020-03-10 2.0 0.0 0.0
2 Afghanistan 2020-02-24 1.0 0.0 0.0
3 Afghanistan 2020-02-25 1.0 0.0 0.0
4 Afghanistan 2020-02-26 1.0 0.0 0.0
... ... ... ... ... ...
87276 occupied Palestinian territory 2020-03-12 0.0 0.0 0.0
87277 occupied Palestinian territory 2020-03-14 0.0 0.0 0.0
87278 occupied Palestinian territory 2020-03-15 0.0 0.0 0.0
87279 occupied Palestinian territory 2020-03-16 0.0 0.0 0.0
87280 occupied Palestinian territory 2020-03-17 0.0 0.0 0.0

87281 rows × 5 columns

In [717]:
df3 = df2[df2['Confirmed']>100] #want case which have confirmed more than 100
In [718]:
df3
Out[718]:
Country Date Confirmed Deaths Recovered
35 Afghanistan 2020-03-28 107.0 4.0 2.0
36 Afghanistan 2020-03-29 118.0 4.0 2.0
37 Afghanistan 2020-03-30 146.0 4.0 2.0
38 Afghanistan 2020-03-31 175.0 4.0 5.0
39 Afghanistan 2020-04-01 197.0 4.0 5.0
... ... ... ... ... ...
87269 Zimbabwe 2021-05-25 38706.0 1587.0 36517.0
87270 Zimbabwe 2021-05-26 38819.0 1589.0 36531.0
87271 Zimbabwe 2021-05-27 38854.0 1592.0 36541.0
87272 Zimbabwe 2021-05-28 38918.0 1592.0 36563.0
87273 Zimbabwe 2021-05-29 38933.0 1594.0 36578.0

75055 rows × 5 columns

Matplotlib¶

In [719]:
import matplotlib.pyplot as plt
In [720]:
x = np.linspace(0,10,1000)
y = np.sin(x)
plt.plot(x,y)
Out[720]:
[<matplotlib.lines.Line2D at 0x16a730dd0>]
In [724]:
plt.scatter(x[::10],y[::10], color = 'red')
Out[724]:
<matplotlib.collections.PathCollection at 0x16a9261d0>
In [725]:
# can make two plot
plt.plot(x,y,color='b')
plt.plot(x,np.cos(x),color='g')
Out[725]:
[<matplotlib.lines.Line2D at 0x16aacddd0>]
In [809]:
#Project Covid 19 using matplotlib
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
from sklearn.impute import SimpleImputer
df = pd.read_csv('/Users/soyeonpark/Downloads/covid_19_data.csv')
In [810]:
df
Out[810]:
SNo ObservationDate Province/State Country/Region Last Update Confirmed Deaths Recovered
0 1 01/22/2020 Anhui Mainland China 1/22/2020 17:00 1.0 0.0 0.0
1 2 01/22/2020 Beijing Mainland China 1/22/2020 17:00 14.0 0.0 0.0
2 3 01/22/2020 Chongqing Mainland China 1/22/2020 17:00 6.0 0.0 0.0
3 4 01/22/2020 Fujian Mainland China 1/22/2020 17:00 1.0 0.0 0.0
4 5 01/22/2020 Gansu Mainland China 1/22/2020 17:00 0.0 0.0 0.0
... ... ... ... ... ... ... ... ...
306424 306425 05/29/2021 Zaporizhia Oblast Ukraine 2021-05-30 04:20:55 102641.0 2335.0 95289.0
306425 306426 05/29/2021 Zeeland Netherlands 2021-05-30 04:20:55 29147.0 245.0 0.0
306426 306427 05/29/2021 Zhejiang Mainland China 2021-05-30 04:20:55 1364.0 1.0 1324.0
306427 306428 05/29/2021 Zhytomyr Oblast Ukraine 2021-05-30 04:20:55 87550.0 1738.0 83790.0
306428 306429 05/29/2021 Zuid-Holland Netherlands 2021-05-30 04:20:55 391559.0 4252.0 0.0

306429 rows × 8 columns

In [811]:
df.drop(['SNo','Last Update'], axis=1, inplace=True)
In [812]:
df.rename(columns = {'ObservationDate':'Date', 'Province/State':'Province', 'Country/Region':'Country'}, inplace = True)
In [813]:
df.head()
Out[813]:
Date Province Country Confirmed Deaths Recovered
0 01/22/2020 Anhui Mainland China 1.0 0.0 0.0
1 01/22/2020 Beijing Mainland China 14.0 0.0 0.0
2 01/22/2020 Chongqing Mainland China 6.0 0.0 0.0
3 01/22/2020 Fujian Mainland China 1.0 0.0 0.0
4 01/22/2020 Gansu Mainland China 0.0 0.0 0.0
In [814]:
df['Date'] = pd.to_datetime(df['Date'])
In [815]:
df.head()
Out[815]:
Date Province Country Confirmed Deaths Recovered
0 2020-01-22 Anhui Mainland China 1.0 0.0 0.0
1 2020-01-22 Beijing Mainland China 14.0 0.0 0.0
2 2020-01-22 Chongqing Mainland China 6.0 0.0 0.0
3 2020-01-22 Fujian Mainland China 1.0 0.0 0.0
4 2020-01-22 Gansu Mainland China 0.0 0.0 0.0
In [816]:
imputer = SimpleImputer(strategy='constant') #missing values 결측치처리 좀 더 알아보기
df2 = pd.DataFrame(imputer.fit_transform(df),columns=df.columns)
In [798]:
df2
Out[798]:
Date Province Country Confirmed Deaths Recovered
0 2020-01-22 Anhui Mainland China 1.0 0.0 0.0
1 2020-01-22 Beijing Mainland China 14.0 0.0 0.0
2 2020-01-22 Chongqing Mainland China 6.0 0.0 0.0
3 2020-01-22 Fujian Mainland China 1.0 0.0 0.0
4 2020-01-22 Gansu Mainland China 0.0 0.0 0.0
... ... ... ... ... ... ...
306424 2021-05-29 Zaporizhia Oblast Ukraine 102641.0 2335.0 95289.0
306425 2021-05-29 Zeeland Netherlands 29147.0 245.0 0.0
306426 2021-05-29 Zhejiang Mainland China 1364.0 1.0 1324.0
306427 2021-05-29 Zhytomyr Oblast Ukraine 87550.0 1738.0 83790.0
306428 2021-05-29 Zuid-Holland Netherlands 391559.0 4252.0 0.0

306429 rows × 6 columns

In [819]:
# 왜 confirmed, deaths, recovered 가 안 나오지???
df3 = df2.groupby(['Country','Date'])[['Confirmed','Deaths','Recovered']].sum().reset_index() # only nemeric columns
In [820]:
df3.head()
Out[820]:
Country Date Confirmed Deaths Recovered
0 Azerbaijan 2020-02-28 1.0 0.0 0.0
1 ('St. Martin',) 2020-03-10 2.0 0.0 0.0
2 Afghanistan 2020-02-24 1.0 0.0 0.0
3 Afghanistan 2020-02-25 1.0 0.0 0.0
4 Afghanistan 2020-02-26 1.0 0.0 0.0
In [805]:
type(df2['Confirmed'])
Out[805]:
pandas.core.series.Series
In [821]:
countries = df3['Country'].unique()
len(countries)
Out[821]:
229
In [829]:
for idx in range(0,len(countries)):
    C = df3[df3['Country']==regions[idx]].reset_index()
    plt.scatter(np.arange(0,len(C)),C['Confirmed'],color ='b', label='Confirmed')
    plt.scatter(np.arange(0,len(C)),C['Recovered'],color ='g', label='Recovered')
    plt.scatter(np.arange(0,len(C)),C['Deaths'],color ='r', label='Deaths')
    plt.title(countries[idx])
    plt.xlabel('Days since the first suspect')
    plt.ylabel('Number of cases')
    plt.legend() #범례
    plt.show() #그래프 보여줘
In [831]:
df4 = df3.groupby(['Date'])[['Confirmed','Deaths','Recovered']].sum().reset_index()
In [832]:
df4.head
Out[832]:
<bound method NDFrame.head of           Date    Confirmed     Deaths    Recovered
0   2020-01-22        557.0       17.0         30.0
1   2020-01-23       1097.0       34.0         60.0
2   2020-01-24        941.0       26.0         39.0
3   2020-01-25       1437.0       42.0         42.0
4   2020-01-26       2118.0       56.0         56.0
..         ...          ...        ...          ...
489 2021-05-25  167848207.0  3485788.0  104824935.0
490 2021-05-26  168416423.0  3498544.0  105380748.0
491 2021-05-27  168970791.0  3511297.0  106078106.0
492 2021-05-28  169470725.0  3523117.0  106633069.0
493 2021-05-29  169951560.0  3533619.0  107140669.0

[494 rows x 4 columns]>
In [837]:
C = df4
plt.scatter(np.arange(0,len(C)),C['Confirmed'],color='b',label='Confirmed')
plt.scatter(np.arange(0,len(C)),C['Recovered'],color='g',label='Recovered')
plt.scatter(np.arange(0,len(C)),C['Deaths'],color='r',label='Deaths')
plt.title('world')
plt.xlabel('Days since the first suspect')
plt.ylabel('Number of cases')
plt.legend()
plt.show()
In [ ]: